home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
Sun Solutions 1997 April to September
/
Sun Solutions CD - APR '97 - SEP '97 (704-3778-12 Rev. H)(Sun Microsystems, Inc.)(1997).iso
/
products
/
.wais
/
wais_SunSolutions
/
UPDATE_INDEX.bkp
< prev
next >
Wrap
Text File
|
1995-11-15
|
3KB
|
75 lines
#!/bin/sh
#
# little script to index the file tree. We are using find instead of the
# recursive option to waisindex because the waisindex option doesn't work
# well.
#
# HEY! - this script runs IN the wais data directory
########################################################################
# variables:
#
# INDEX_NAME - the name of the index database. it should match the name
# in the /cgi-bin/newwais.pl file ($src).
# HTTP_SERVER - server hostname
# DOCROOT - root directory that httpd is serving out of.
# DIRECTORIES - list of all the directories to index
#
#
# NOTE: just changing the variable is not enough. you must change all the
# pathnames in the file to your installation specifics.
INDEX_NAME="/opt/db/wais/catalyst_catalog"
HTTP_SERVER="pinatubo"
#DOCROOT="$CD_MOUNT/var/opt/WWW/NCSA/htdocs/CCx86-sparc"
DOCROOT="/opt/db/wais-src"
DIRECTORIES=" \
Arch_Eng Elec_Pub Mech_Eng Soft_Eng Artif_Intell Fin_Serv Medical Storage_Dev Business Geo_Inf_Sys Multimedia Sys_Admin Client_Serv Graphics_Imaging Networking Sys_Int Consult_Pub Hard_Periph Oil_Gas Telecomm Desktop Horiz_Tools Public_Safety Transportation Doc_Image_Man Info_Man Research Utilities Ed_Comp_Train Legal Retail_Dist Elec_Design_Auto Manufacturing Signal_Proc \
"
########################################################################
# get rid of the temporary index file. if a synonym file does not exist
# create a dummy one.
#
# Shouldn't need to worry about this section
#
rm -f $INDEX_NAME.*idxable
if [ ! -f $INDEX_NAME.syn ]
then
echo "# synonym file. form is:" > $INDEX_NAME.syn
echo "# word syn0 syn1 ..." >> $INDEX_NAME.syn
echo "# e.g." >> $INDEX_NAME.syn
echo "# spam pork-shoulder yummy" >> $INDEX_NAME.syn
echo "dummy dummy" >> $INDEX_NAME.syn
fi
########################################################################
# use find to add the filenames to a temp file. if you add more file
# types (e.g. .gif is a file type)
# you'll probably want to update /cgi-bin/newwais.pl in your httpd
# httpd directory so the search result is pretty
find $DOCROOT -follow -name "*.html" -print >> $INDEX_NAME.idxable ;
########################################################################
# index the files using the temp file as input. The URL substitution
# is a feature of freeWAIS .202 and up. it transforms the filename
# into the correct URL so that relative URL's work. The general
# form is -t URL <what to strip off the front> <what to add to the front>
#
# notes:
#
# * use -a on the subsequent index runs to keep appending to the index file
# * -nocontents tells the indexer to only use the filename...the file
# contents is ignored
./waisindex -d $INDEX_NAME -export -t URL /opt/db http://$HTTP_SERVER -stdin < $INDEX_NAME.idxable
#./waisindex -a -nocontents -d $INDEX_NAME -export -t URL $CD_MOUNT/var/opt/WWW/NCSA/htdocs http://$HTTP_SERVER -stdin < $INDEX_NAME.notidxable